import re
from builtins import object
from urllib import request
from urllib.request import Request


class Pbug(object):
    path = r""
    url = r""
    pattern = ""

    def __init__(self, url):
        self.url = url

    def setFilePpth(self, path):
        self.path = path

    def startPbug(self):
        headers = {"User-Agent":"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                                "(KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36"}

        req = Request(self.url, headers=headers)
        response = request.urlopen(req)

        result = response.read()
        print(result)
        re_pa = re.compile(self.pattern, re.S)
        re_result = re_pa.findall(str(result))
        # with open(self.path, "wb") as f:
        #     f.write(result)
        #     f.close()
        print(re_result)


pbug = Pbug("https://wenku.baidu.com/view/7b375e393b3567ec102d8ae6.html")
pbug.pattern = r'<p class="reader-word-layer reader-word".*?</'
pbug.setFilePpth(r"E:\filework\lunwen.txt")
pbug.startPbug()





